
library(tidyverse)
library(doParallel)
library(foreach)

## create helper functions to generate the Monte Carlo data ----

# a function to generate res
make_re <- function(n_issues, var = c(0.05, 0.05), cor = 0) {
  cov_mat <- matrix(c(var[1], cor*sqrt(var[1])*sqrt(var[2]),
                      cor*sqrt(var[1])*sqrt(var[2]), var[2]),
                    nrow = 2, ncol = 2)
  re <- mvtnorm::rmvnorm(n = n_issues, mean = c(0, 0), sigma = cov_mat)
  colnames(re) <- c("r_int", "r_trt")
  re_df <- as_tibble(re) %>%
    mutate(issue_id = 1:n_issues)
  return(re_df)
}

make_re(5)

# a function to generate regression residuals
make_residuals <- function(n_respondents, var = 3.5) {
  r <- tibble(r = rnorm(n_respondents, 0, sd = sqrt(var)), 
              respondent_id = 1:n_respondents)
  return(r)
}

make_residuals(500)

# a function to generate the observed explanatory varaibles
make_data <- function(n_respondents, n_issues) {
  data <- tibble(respondent_id = 1:n_respondents,
                 issue_id = rep(sample(1:n_issues, size = n_issues), length.out = n_respondents)) %>%
    group_by(issue_id) %>%
    mutate(treatment = rep(sample(c(-0.5, 0.5), size = 2), length.out = n())) %>%
    ungroup()
  return(data)
}

d <- make_data(1000, 50)

with(d, table(issue_id, treatment))

## do the single-topic simulations ----

# set the single-topic parameters
n_mc_sims <- 7000
log_n <- seq(log10(200), log10(2000), length.out = 10)

n1 <- round((10^log_n)/10)*10; n1
sim_par <- crossing(n_respondents = n1,
                      sigma2_trt = c(0.10, 0.20, 0.40)^2,   # roughly from our data
                      n_topics = c(5, 10, 25, 50, 100),         # roughly from our data
                      mu_trt = 0) %>%
  filter(n_respondents/n_topics >= 4) %>%
  #filter(n_topics == 25, sigma2_trt == 0.2^2) %>%
  mutate(par_id = 1:n()) %>%
  glimpse()

# do the simulations in parallel
# > runif(1)
# [1] 0.7661655
#set.seed(7661655)
cl <- makeCluster(detectCores(), outfile = "simulation-console-output.log")
registerDoParallel(cl)
start_time <- Sys.time()
cat(paste0("\nAbout to start simulations... woo hoo! It's ", start_time, ".\n\n"), file = "ss-progress.log")
pkg <- c("tidyverse", "blme")
start_ii_time <- Sys.time()
sims_list <- foreach(ii = 1:nrow(sim_par), .packages = pkg) %dopar% {
  #set.seed(7661654 + ii)
  # save start time
  start_i_time <- Sys.time()
  ret1 <- ret20 <- ret20b <- NULL
  for (i in 1:n_mc_sims) {
    re1 <- make_re(1, var = c(0.05, 0.05))
    residuals <- make_residuals(sim_par$n_respondents[ii])
    data1 <- make_data(sim_par$n_respondents[ii], n_issues = 1) %>%
      left_join(re1, by = "issue_id") %>%
      left_join(residuals, by = "respondent_id") %>%
      mutate(y = 0 + r_int + (sim_par$mu_trt[ii] + r_trt)*treatment + r) 
    
    fit1 <- lm(y ~ treatment, data = data1)
    
    ret1[[i]] <- tibble(est = coef(fit1)["treatment"],
                        se = sqrt(diag(vcov(fit1)))["treatment"]) %>%
      mutate(method = "Single-Topic Study",
             par_id = ii, 
             true = re1$r_trt[1])

    
    # generate res and residuals 
    re20 <- make_re(sim_par$n_topics[ii], var = c(0.05, sim_par$sigma2_trt[ii]))
    residuals <- make_residuals(sim_par$n_respondents[ii])
    data20 <- make_data(sim_par$n_respondents[ii], n_issues = sim_par$n_topics[ii]) %>%
      left_join(re20, by = "issue_id") %>%
      left_join(residuals, by = "respondent_id") %>% 
      mutate(y = 0 + r_int + (sim_par$mu_trt[ii] + r_trt)*treatment + r) 

    fit20 <- blmer(y ~ treatment + (1 + treatment | issue_id), data = data20, 
                  control = lmerControl(optimizer = "Nelder_Mead"))
                  #control = lmerControl(optimizer = "nloptwrap"))
    
    
    ret20[[i]] <- tibble(est = fixef(fit20)["treatment"],
                        se = sqrt(diag(vcov(fit20)))["treatment"],
                        true = sim_par$mu_trt[ii]) %>%
      mutate(method = "Topic-Sampling Study (Overall Effect)",
             par_id = ii)
    
    # # generate res and residuals
    # sim <- arm::sim(fit20)
    # re <- ranef(sim)$issue_id[,,  "treatment"]
    # fe <- fixef(sim)[, "treatment"]
    # comb <- re + fe
    # est <- apply(comb, 2, mean)
    # se <- apply(comb, 2, sd)
    # 
    # ret20b[[i]] <- tibble(est = est,
    #                      se = se, 
    #                      topic_id = 1:length(est)) %>%
    #   mutate(method = "Topic-Sampling Study (Particular Effect)",
    #          par_id = ii)
  }
  # update progress file
  end_i_time <- Sys.time()
  cat(paste0("\nDone with parameter combo ", ii, " of ", nrow(sim_par), ". Started at ", start_i_time, "; finished at ", end_i_time, 
             ". That took ", round(difftime(end_i_time, start_i_time, units = "secs"), 2), " seconds."), 
      file = "ss-progress.log", append = TRUE)
  bind_rows(ret1, ret20) #%>%
    #bind_rows(ret20b)
}
# update progress file
end_ii_time <- Sys.time()
cat(paste0("\n\nDone! Started at ", start_ii_time, "; finished at ", end_ii_time, 
           ". That took ", round(difftime(end_ii_time, start_ii_time, units = "secs"), 2), " seconds."), 
    file = "ss-progress.log", append = TRUE)
stopCluster(cl)

res <- bind_rows(sims_list) %>%
  mutate(method = factor(method)) %>%
  left_join(sim_par) %>%
  write_csv("output/sample-size-simulations.csv") %>%
  glimpse()
